In [ ]:
# Copyright 2020 Google LLC
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.
1. Familiar with Python
2. Completed Chapter I: Models by Design
1. Create a model design template
2. Construct the stem and classifier component
3. Construct the learner component
4. Construct blocks with max pooling
5. Construct blocks with feature pooling
6. Compare maxpooling to feature pooling training differences
Let's create a model template based of the macro-architecture, which includes:
1. stem
2. learner
3. classifier
You fill in the blanks (replace the ??), make sure it passes the Python interpreter.
You will need to:
1. Add the activation function to the stem.
2. Pass the group parameters for each group to the group method.
3. Add global average pooling to the classifier.
In [ ]:
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import Dense, Conv2D, ReLU, BatchNormalization, GlobalAveragePooling2D, MaxPooling2D
def stem(inputs):
outputs = Conv2D(32, (3, 3), strides=(1, 1), padding='same')(inputs)
outputs = BatchNormalization()(outputs)
# Add activation function
# HINT: implement Conv-BN-RE (post-activation batch normalization)
outputs = ??
return outputs
def learner(inputs, groups):
outputs = inputs
for group_params in groups:
# Pass the group parameters as python kwargs
# HINT: remember the string ** parameter syntax?
outputs = group(outputs, ??)
return outputs
def group(inputs, **blocks):
outputs = inputs
for block in blocks:
return outputs
def classifier(inputs, n_classes):
# Flatten and reduce the feature maps to single pixel each.
# Hint: it is method with 'Global' in the name.
outputs = ??
outputs = Dense(n_classes, activation='softmax')(outputs)
return outputs
# Create the input tensor
inputs = Input((32, 32, 3))
# Assemble the components of the model
outputs = stem(inputs)
outputs = learner(outputs, [ {'blocks': [{'n_filters': 64}]},
{'blocks': [{'n_filters': 128}, {'n_filters':128}]}
outputs = classifier(outputs, 10)
# Put the model together
model = Model(inputs, outputs)
It should look like below:
Model: "model"
Layer (type) Output Shape Param #
input_8 (InputLayer) [(None, 32, 32, 3)] 0
conv2d_6 (Conv2D) (None, 32, 32, 32) 896
batch_normalization_6 (Batch (None, 32, 32, 32) 128
re_lu_5 (ReLU) (None, 32, 32, 32) 0
global_average_pooling2d (Gl (None, 32) 0
dense (Dense) (None, 10) 330
Total params: 1,354
Trainable params: 1,290
Non-trainable params: 64
In [ ]:
Next, we will complete the learner component by:
1. Design the group method
2. Design the block method using max pooling
You will need to:
1. Extract the blocks parameters for the group.
2. Extract the number of filters parameter for the block.
3. Add max pooling in block to downsample size of feature maps.
In [ ]:
def group(inputs, **blocks):
outputs = inputs
# Extract the blocks parameters from kwargs blocks
# Hint: the parameter blocks is a dictionary, and 'blocks' is the key
blocks = ??
for block_params in blocks:
outputs = block(outputs, **block_params)
return outputs
def block(inputs, **block):
# Extract the number of filters from the kwargs block
# HINT: key is n_filters
n_filters = ??
outputs = Conv2D(n_filters, (3, 3), strides=(1, 1), padding='same')(inputs)
outputs = BatchNormalization()(outputs)
outputs = ReLU()(outputs)
# Add max pooling layer to reduce feature maps by 75%
# HINT: defaults to strides=(2, 2), but you can specify it anyways
outputs = MaxPooling2D((2, 2))(outputs)
return outputs
inputs = Input((32, 32, 3))
outputs = stem(inputs)
outputs = learner(outputs, [ {'blocks': [{'n_filters': 64}]},
{'blocks': [{'n_filters': 128}, {'n_filters':128}]}
outputs = classifier(outputs, 10)
model_a = Model(inputs, outputs)
It should look like below:
Layer (type) Output Shape Param #
input_2 (InputLayer) [(None, 32, 32, 3)] 0
conv2d_1 (Conv2D) (None, 32, 32, 32) 896
batch_normalization_1 (Batch (None, 32, 32, 32) 128
re_lu_1 (ReLU) (None, 32, 32, 32) 0
conv2d_2 (Conv2D) (None, 32, 32, 64) 18496
batch_normalization_2 (Batch (None, 32, 32, 64) 256
re_lu_2 (ReLU) (None, 32, 32, 64) 0
max_pooling2d (MaxPooling2D) (None, 16, 16, 64) 0
conv2d_3 (Conv2D) (None, 16, 16, 128) 73856
batch_normalization_3 (Batch (None, 16, 16, 128) 512
re_lu_3 (ReLU) (None, 16, 16, 128) 0
max_pooling2d_1 (MaxPooling2 (None, 8, 8, 128) 0
conv2d_4 (Conv2D) (None, 8, 8, 128) 147584
batch_normalization_4 (Batch (None, 8, 8, 128) 512
re_lu_4 (ReLU) (None, 8, 8, 128) 0
max_pooling2d_2 (MaxPooling2 (None, 4, 4, 128) 0
global_average_pooling2d_1 ( (None, 128) 0
dense_1 (Dense) (None, 10) 1290
Total params: 243,530
Trainable params: 242,826
Non-trainable params: 704
In [ ]:
In [ ]:
def block(inputs, **block):
n_filters = block['n_filters']
outputs = Conv2D(n_filters, (3, 3), strides=(1, 1), padding='same')(inputs)
outputs = BatchNormalization()(outputs)
outputs = ReLU()(outputs)
# Add a feature pooling convolution to reduce feature map size by 75%
# HINT: It is in the strides and padding
outputs = Conv2D(n_filters, (3, 3), ??)(outputs)
outputs = BatchNormalization()(outputs)
outputs = ReLU()(outputs)
return outputs
inputs = Input((32, 32, 3))
outputs = stem(inputs)
outputs = learner(outputs, [ {'blocks': [{'n_filters': 64}]},
{'blocks': [{'n_filters': 128}, {'n_filters':128}]}
outputs = classifier(outputs, 10)
model_b = Model(inputs, outputs)
It should look like below:
Layer (type) Output Shape Param #
input_25 (InputLayer) [(None, 32, 32, 3)] 0
conv2d_29 (Conv2D) (None, 32, 32, 32) 896
batch_normalization_29 (Batc (None, 32, 32, 32) 128
re_lu_28 (ReLU) (None, 32, 32, 32) 0
conv2d_30 (Conv2D) (None, 32, 32, 64) 18496
batch_normalization_30 (Batc (None, 32, 32, 64) 256
re_lu_29 (ReLU) (None, 32, 32, 64) 0
conv2d_31 (Conv2D) (None, 16, 16, 64) 36928
batch_normalization_31 (Batc (None, 16, 16, 64) 256
re_lu_30 (ReLU) (None, 16, 16, 64) 0
conv2d_32 (Conv2D) (None, 16, 16, 128) 73856
batch_normalization_32 (Batc (None, 16, 16, 128) 512
re_lu_31 (ReLU) (None, 16, 16, 128) 0
conv2d_33 (Conv2D) (None, 8, 8, 128) 147584
batch_normalization_33 (Batc (None, 8, 8, 128) 512
re_lu_32 (ReLU) (None, 8, 8, 128) 0
conv2d_34 (Conv2D) (None, 8, 8, 128) 147584
batch_normalization_34 (Batc (None, 8, 8, 128) 512
re_lu_33 (ReLU) (None, 8, 8, 128) 0
conv2d_35 (Conv2D) (None, 4, 4, 128) 147584
batch_normalization_35 (Batc (None, 4, 4, 128) 512
re_lu_34 (ReLU) (None, 4, 4, 128) 0
global_average_pooling2d_8 ( (None, 128) 0
dense_8 (Dense) (None, 10) 1290
Total params: 576,906
Trainable params: 575,562
Non-trainable params: 1,344
In [ ]:
In [ ]:
from tensorflow.keras.datasets import cifar10
import numpy as np
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train = (x_train / 255.0).astype(np.float32)
x_test = (x_test / 255.0).astype(np.float32)
In [ ]:
model_a.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['acc']), y_train, epochs=3, batch_size=32, validation_split=0.1, verbose=1)
model_a.evaluate(x_test, y_test)
In [ ]:
model_b.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['acc']), y_train, epochs=3, batch_size=32, validation_split=0.1, verbose=1)
model_b.evaluate(x_test, y_test)
Since we replaced the max pooling with feature pooling, you see a modest increase in the training time to additionally train the feature pooling layer.
If you compare on an epoch by epoch between the two models, you generally will see little difference on the first two epochs, and then gradually the feature pooling will increase in accuracy faster than the max pooling. The delay is due to the fact that it takes a bit of training to teach the feature pooling layer to optimize pooling the feature maps.
In otherwords, it is random and thus starts worse than max pooling (static algorithm) and gradually learns.